rm(list =ls())
options(scipen=999)
gc()
packages <-c("tidyverse","readstata13","xtable","stringi")

new.packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)

lapply(packages, require, character.only = TRUE)
rm(packages, new.packages)

setwd("PUT YOUR DIRECTORY HERE")

## load data
data <- read.csv("./Datasets/panel_bare_bones.csv")

## clean data
data$X <- NULL
data$locality_year <- NULL
data$subbotnik_inkind <- NULL
data$locality <- as.character(tolower(data$locality))


# load covariates, including spy priests
data <- merge(data, read.csv("./Datasets/main_confounders.csv")[,c("locality", "region", "pop", "schools", "shops", "restaurants", "cinemas", "Frac48", "coal_sqm", 
                                                                   "minerals", "partition", "priests_continuous", "pop",
                                                                   "coal_mines", "delegates", "protests_40s", "sabotage_40s",
                                                                   "terror_40s", "income_76_capita", "income_77_capita")], by = c("locality"), all.x = T)

# some cleaning
data$region_numeric <- as.numeric(as.factor(data$region))
data$russian <- ifelse(data$partition == "Russian", 1, 0)
data$subbotnik <- data$subbotnik_incash_
data$priests <- data$priests_continuous
data$coal <- data$coal_sqm

# summarize 
data.sum <- data %>%
  dplyr::select(commanders, strikes, subbotnik, priests, pop, schools,  shops, restaurants, cinemas, Frac48, coal, minerals, russian, 
         coal_mines, delegates, protests_40s, sabotage_40s, terror_40s, income_76_capita, income_77_capita)%>%
  dplyr::rowwise()%>%
  dplyr::mutate(income = mean(c(income_76_capita, income_77_capita), na.rm = TRUE))%>%
  dplyr::ungroup()%>%
  dplyr::select(-c(income_76_capita, income_77_capita))%>%
  summarise_each(funs(min = min(., na.rm = T), 
                      max = max(., na.rm = T),
                      mean = mean(., na.rm = T), 
                      sd = sd(., na.rm = T),
                      n = sum(!is.na(.))))


# reshape it

data.stats.tidy <- data.sum %>% 
  gather()%>%
  dplyr::mutate(key = stringi::stri_replace_last_fixed(key, '_', '.'))%>%
  separate(key, into = c("var", "stat"), sep = "\\.")%>%
  spread(stat, value)%>%
  dplyr::select(var, min, max, mean, sd)
  

# clean with digits
data.stats.tidy[,c(3)] <- round(data.stats.tidy[,c(3)],0)
data.stats.tidy[,c(4:5)] <- round(data.stats.tidy[,c(4:5)], 5)

# order sensibly
data.stats.tidy$var
data.stats.tidy <- data.stats.tidy[order(data.stats.tidy$var),]

# rename variables
data.stats.tidy$var <- c("Cinemas", "Coal (t)", "Coal mines","Officers", "Solidarnosc Delegates", 
                         "Ethnicity (frac.)", "Income (zl)","Minerals (%)", "Population", "Corrupted priests", "Protests 1940s",
                         "Restaurants", "Russian", "Sabotage 1940s", "Schools", "Shops", "Strikes", "Subbotnik (zl)",
                         "Terror 1940s")

order <- c("Strikes","Subbotnik (zl)", "Officers", "Corrupted priests", "Cinemas", "Coal (t)",
           "Ethnicity (frac.)", "Minerals (%)", "Population", "Restaurants", "Russian", "Schools",
           "Shops", "Income (zl)", "Coal mines", "Solidarnosc Delegates", "Protests 1940s",
           "Sabotage 1940s", "Terror 1940s")

data.stats.tidy <- data.stats.tidy%>%
  slice(match(order, var))

data.stats.tidy$Time <- c("1980 - 86","1975 - 79","1945 - 89","1949 - 56","1975","1975",
                          "1948","2005",rep("1975",5),"1976 - 77","1975 - 86","1981 - 86",
                          rep("1946 - 48", 3))

colnames(data.stats.tidy) <- c("", "Min", "Max", "Mean", "SD", "Time")

# save
print.xtable(xtable(data.stats.tidy, digits = c(0, 0, 0, 0, 0, 5, 1)), 
             type = "latex"), 
             file = "PUT YOUR FILEPATH HERE")


